%pip install -q transformers torch matplotlib scipy prettytable

Python interpreter will be restarted.
Python interpreter will be restarted.

import torch
from transformers import BertTokenizer, BertModel
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
from prettytable import PrettyTable
import textwrap

# ------------------------------ Configuration ------------------------------
TABLE_WRAP_WIDTH = 50  # Set this width for better viewing on smaller screens
# ---------------------------------------------------------------------------

# Function to create embeddings using a specified model
def create_embedding(text, model_name):
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertModel.from_pretrained(model_name)

    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
    
    with torch.no_grad():
        outputs = model(**inputs)
    
    cls_embedding = outputs.last_hidden_state[:, 0, :].numpy()
    return cls_embedding

# Function to calculate accuracy based on cosine similarity
def calculate_accuracy(embedding1, embedding2):
    similarity = 1 - cosine(embedding1, embedding2)
    return similarity

# Function to plot the accuracies of both models
def plot_accuracy(bert_accuracy, biobert_accuracy):
    plt.figure(figsize=(8, 6))
    plt.title('RAG Accuracy Comparison between BERT and BioBERT')
    plt.bar(['BERT', 'BioBERT'], [bert_accuracy, biobert_accuracy], color=['blue', 'orange'])
    plt.ylabel('Cosine Similarity (Accuracy)')
    plt.ylim(0, 1)
    plt.axhline(y=0.5, color='gray', linestyle='--', label='Random Guess')
    plt.legend()
    plt.grid()
    plt.show()

# Helper to wrap long text
def wrap_text(text, width=TABLE_WRAP_WIDTH):
    return '\n'.join(textwrap.wrap(text, width=width))

# Similar domain-specific sentences
sentence1 = "Manage and organize feedback from various languages in international operations."
sentence2 = "The association between poor glycemic control, oxidative stress, insulin resistance, and of low-grade inflammation have been suggested as putative factors linking diabetes and cardiovascular disease."

# Create embeddings using both BERT and BioBERT
bert_embedding1 = create_embedding(sentence1, 'bert-base-uncased')
bert_embedding2 = create_embedding(sentence2, 'bert-base-uncased')
biobert_embedding1 = create_embedding(sentence1, 'dmis-lab/biobert-v1.1')
biobert_embedding2 = create_embedding(sentence2, 'dmis-lab/biobert-v1.1')

# Calculate accuracies
bert_accuracy = calculate_accuracy(bert_embedding1, bert_embedding2)
biobert_accuracy = calculate_accuracy(biobert_embedding1, biobert_embedding2)

# Prepare table display
table = PrettyTable()
table.field_names = ["Model", "Sentence 1", "Sentence 2", "Cosine Similarity"]
table.align = "l"

table.add_row([
    "BERT",
    wrap_text(sentence1),
    wrap_text(sentence2),
    f"{bert_accuracy:.4f}"
])
table.add_row([
    "BioBERT",
    wrap_text(sentence1),
    wrap_text(sentence2),
    f"{biobert_accuracy:.4f}"
])

# Print table
print("\n--- Similarity Accuracy ---\n")
print(table)

# Plot results
plot_accuracy(bert_accuracy, biobert_accuracy)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

--- Similarity Accuracy ---

+---------+-------------------------------------------+----------------------------------------------------+-------------------+
| Model   | Sentence 1                                | Sentence 2                                         | Cosine Similarity |
+---------+-------------------------------------------+----------------------------------------------------+-------------------+
| BERT    | Manage and organize feedback from various | The association between poor glycemic control,     | 0.5931            |
|         | languages in international operations.    | oxidative stress, insulin resistance, and of low-  |                   |
|         |                                           | grade inflammation have been suggested as putative |                   |
|         |                                           | factors linking diabetes and cardiovascular        |                   |
|         |                                           | disease.                                           |                   |
| BioBERT | Manage and organize feedback from various | The association between poor glycemic control,     | 0.7665            |
|         | languages in international operations.    | oxidative stress, insulin resistance, and of low-  |                   |
|         |                                           | grade inflammation have been suggested as putative |                   |
|         |                                           | factors linking diabetes and cardiovascular        |                   |
|         |                                           | disease.                                           |                   |
+---------+-------------------------------------------+----------------------------------------------------+-------------------+

Domain-Specific LLM Embedding Accuracy Evaluation¶

The two models used in experiment are BERT and BioBERT¶